import pandas as pd
from scipy.stats import kendalltau

# Load the dataset
df = pd.read_csv('category_1.csv')

# Filter data for particular brnads if necessary, othewise commented out
framing_df = df[df['brand_name'].str.contains('shoes', case=False, na=False)]
framing_df = framing_df.copy()

# Mapping of item categories to numerical values
x_mapping = {"negative": -1, "positive": 1}

# Function to map responses to numerical values based on conditions, with additional data cleaning
def y_mapping(row):
    # Clean and prepare data
    item_category = row['item_category'].strip().lower() if pd.notna(row['item_category']) else None
    stereotype = row['stereotype'].strip().lower() if pd.notna(row['stereotype']) else None
    anti_stereotype = row['anti_stereotype'].strip().lower() if pd.notna(row['anti_stereotype']) else None
    response = row['response'].strip().lower() if pd.notna(row['response']) else None

    if item_category == 'negative' and response == stereotype:
        return -1
    elif item_category == 'positive' and response == stereotype:
        return 1
    elif item_category == 'negative' and response == anti_stereotype:
        return 1
    elif item_category == 'positive' and response == anti_stereotype:
        return -1
    else:
        return 0

x = []
y = []

# Filter data by type_category, replace with 'type_2' when worked with type2 category
temp_data = framing_df[framing_df['type_category'] == 'type_1']

for index, row in temp_data.iterrows():
    x_value = x_mapping.get(row['item_category'].strip().lower(), 0) 
    y_value = y_mapping(row) 

    x.append(x_value)
    y.append(y_value)

# Ensure there is enough variation in y for valid statistical analysis
if len(set(y)) > 1:
    tau, p_value = kendalltau(x, y, method="asymptotic", variant='c')
    print('Total data: ', len(x))
    print(f"Kendall's Tau Correlation for type 1: {tau}")
    print(f"P-Value: {p_value}")
else:
    print("Not enough variation in 'y' for valid statistical analysis.")
